{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "登录成功！\n"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.by import By\n",
    "from selenium.webdriver.chrome.options import Options\n",
    "import time\n",
    "\n",
    "options = Options()\n",
    "options.headless = False  # 关闭无头模式，避免被检测\n",
    "driver = webdriver.Chrome(options=options)\n",
    "\n",
    "# 访问 Pixiv 并设置 cookies\n",
    "driver.get(\"https://www.pixiv.net/\")\n",
    "time.sleep(3)\n",
    "driver.add_cookie({\"name\": \"PHPSESSID\", \"value\": \"你的PHPSESSID值\"})\n",
    "\n",
    "# 刷新页面，绕过登录\n",
    "driver.refresh()\n",
    "time.sleep(5)\n",
    "print(\"登录成功！\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "已获取第 1 页的链接\n",
      "已获取第 2 页的链接\n",
      "已获取第 3 页的链接\n",
      "已获取第 4 页的链接\n",
      "已获取第 5 页的链接\n",
      "All links have been collected\n"
     ]
    }
   ],
   "source": [
    "from selenium.webdriver.common.by import By\n",
    "from selenium.webdriver.common.keys import Keys\n",
    "from selenium.webdriver.support import expected_conditions as EC\n",
    "import time\n",
    "import bs4\n",
    "\n",
    "# 获取 Pixiv `series` 页面所有作品链接\n",
    "page_links = []\n",
    "def get_pixiv_series_images(url):\n",
    "    driver.get(url)\n",
    "    time.sleep(3)\n",
    "    \n",
    "    for _ in range(2):  # 滚动加载更多内容\n",
    "        driver.execute_script(\"window.scrollTo(0, document.body.scrollHeight);\")\n",
    "        time.sleep(2)\n",
    "    \n",
    "    soup = bs4.BeautifulSoup(driver.page_source, \"html.parser\")\n",
    "    links = soup.find_all(\"a\", class_=\"sc-d00cb9ca-0 jmvyWD sc-5a760b36-6 bsJslY gtm-gtm-manga-series-thumbnail\")\n",
    "    for link in links:\n",
    "        href = link.get(\"href\")\n",
    "        if href and href not in page_links:\n",
    "            page_links.append(\"https://www.pixiv.net\" + href)\n",
    "\n",
    "# 获取所有系列页链接\n",
    "for i in range(1, 6):\n",
    "    url = f\"https://www.pixiv.net/user/14601125/series/217742?p={i}#seriesContents\"\n",
    "    get_pixiv_series_images(url)\n",
    "    print(f\"已获取第 {i} 页的链接\")\n",
    "    time.sleep(2)\n",
    "print(\"All links have been collected\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "ename": "InvalidSessionIdException",
     "evalue": "Message: invalid session id\nStacktrace:\n\tGetHandleVerifier [0x00007FF67F054C25+3179557]\n\t(No symbol) [0x00007FF67ECB88A0]\n\t(No symbol) [0x00007FF67EB48FFC]\n\t(No symbol) [0x00007FF67EB8F8DF]\n\t(No symbol) [0x00007FF67EBC7AF2]\n\t(No symbol) [0x00007FF67EBC244E]\n\t(No symbol) [0x00007FF67EBC14F9]\n\t(No symbol) [0x00007FF67EB155E5]\n\tGetHandleVerifier [0x00007FF67F0B67CD+3579853]\n\tGetHandleVerifier [0x00007FF67F0CD1D2+3672530]\n\tGetHandleVerifier [0x00007FF67F0C2153+3627347]\n\tGetHandleVerifier [0x00007FF67EE2092A+868650]\n\t(No symbol) [0x00007FF67ECC2FFF]\n\t(No symbol) [0x00007FF67EB141FF]\n\tGetHandleVerifier [0x00007FF67F13FA28+4141608]\n\tBaseThreadInitThunk [0x00007FFFD47EE8D7+23]\n\tRtlUserThreadStart [0x00007FFFD5D5BF2C+44]\n",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mInvalidSessionIdException\u001b[0m                 Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[14], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mdriver\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_cookies\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)  \u001b[38;5;66;03m# 打印 cookies\u001b[39;00m\n",
      "File \u001b[1;32mc:\\Users\\16673\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py:619\u001b[0m, in \u001b[0;36mWebDriver.get_cookies\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    610\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_cookies\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m List[\u001b[38;5;28mdict\u001b[39m]:\n\u001b[0;32m    611\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Returns a set of dictionaries, corresponding to cookies visible in\u001b[39;00m\n\u001b[0;32m    612\u001b[0m \u001b[38;5;124;03m    the current session.\u001b[39;00m\n\u001b[0;32m    613\u001b[0m \n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    617\u001b[0m \u001b[38;5;124;03m            driver.get_cookies()\u001b[39;00m\n\u001b[0;32m    618\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[1;32m--> 619\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mCommand\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mGET_ALL_COOKIES\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvalue\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
      "File \u001b[1;32mc:\\Users\\16673\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py:384\u001b[0m, in \u001b[0;36mWebDriver.execute\u001b[1;34m(self, driver_command, params)\u001b[0m\n\u001b[0;32m    382\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcommand_executor\u001b[38;5;241m.\u001b[39mexecute(driver_command, params)\n\u001b[0;32m    383\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response:\n\u001b[1;32m--> 384\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43merror_handler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcheck_response\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    385\u001b[0m     response[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvalue\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_unwrap_value(response\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvalue\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[0;32m    386\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
      "File \u001b[1;32mc:\\Users\\16673\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\selenium\\webdriver\\remote\\errorhandler.py:232\u001b[0m, in \u001b[0;36mErrorHandler.check_response\u001b[1;34m(self, response)\u001b[0m\n\u001b[0;32m    230\u001b[0m         alert_text \u001b[38;5;241m=\u001b[39m value[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124malert\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m    231\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m exception_class(message, screen, stacktrace, alert_text)  \u001b[38;5;66;03m# type: ignore[call-arg]  # mypy is not smart enough here\u001b[39;00m\n\u001b[1;32m--> 232\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception_class(message, screen, stacktrace)\n",
      "\u001b[1;31mInvalidSessionIdException\u001b[0m: Message: invalid session id\nStacktrace:\n\tGetHandleVerifier [0x00007FF67F054C25+3179557]\n\t(No symbol) [0x00007FF67ECB88A0]\n\t(No symbol) [0x00007FF67EB48FFC]\n\t(No symbol) [0x00007FF67EB8F8DF]\n\t(No symbol) [0x00007FF67EBC7AF2]\n\t(No symbol) [0x00007FF67EBC244E]\n\t(No symbol) [0x00007FF67EBC14F9]\n\t(No symbol) [0x00007FF67EB155E5]\n\tGetHandleVerifier [0x00007FF67F0B67CD+3579853]\n\tGetHandleVerifier [0x00007FF67F0CD1D2+3672530]\n\tGetHandleVerifier [0x00007FF67F0C2153+3627347]\n\tGetHandleVerifier [0x00007FF67EE2092A+868650]\n\t(No symbol) [0x00007FF67ECC2FFF]\n\t(No symbol) [0x00007FF67EB141FF]\n\tGetHandleVerifier [0x00007FF67F13FA28+4141608]\n\tBaseThreadInitThunk [0x00007FFFD47EE8D7+23]\n\tRtlUserThreadStart [0x00007FFFD5D5BF2C+44]\n"
     ]
    }
   ],
   "source": [
    "print(driver.get_cookies())  # 打印 cookies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "https://www.pixiv.net/artworks/123828051\n"
     ]
    },
    {
     "ename": "InvalidSessionIdException",
     "evalue": "Message: invalid session id\nStacktrace:\n\tGetHandleVerifier [0x00007FF67F054C25+3179557]\n\t(No symbol) [0x00007FF67ECB88A0]\n\t(No symbol) [0x00007FF67EB48FFC]\n\t(No symbol) [0x00007FF67EB8F8DF]\n\t(No symbol) [0x00007FF67EBC7AF2]\n\t(No symbol) [0x00007FF67EBC244E]\n\t(No symbol) [0x00007FF67EBC14F9]\n\t(No symbol) [0x00007FF67EB155E5]\n\tGetHandleVerifier [0x00007FF67F0B67CD+3579853]\n\tGetHandleVerifier [0x00007FF67F0CD1D2+3672530]\n\tGetHandleVerifier [0x00007FF67F0C2153+3627347]\n\tGetHandleVerifier [0x00007FF67EE2092A+868650]\n\t(No symbol) [0x00007FF67ECC2FFF]\n\t(No symbol) [0x00007FF67EB141FF]\n\tGetHandleVerifier [0x00007FF67F13FA28+4141608]\n\tBaseThreadInitThunk [0x00007FFFD47EE8D7+23]\n\tRtlUserThreadStart [0x00007FFFD5D5BF2C+44]\n",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mInvalidSessionIdException\u001b[0m                 Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[13], line 40\u001b[0m\n\u001b[0;32m     38\u001b[0m \u001b[38;5;66;03m# 获取所有作品详情页图片链接\u001b[39;00m\n\u001b[0;32m     39\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m link \u001b[38;5;129;01min\u001b[39;00m page_links:\n\u001b[1;32m---> 40\u001b[0m     \u001b[43mget_image_links\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlink\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     42\u001b[0m driver\u001b[38;5;241m.\u001b[39mquit()\n",
      "Cell \u001b[1;32mIn[13], line 9\u001b[0m, in \u001b[0;36mget_image_links\u001b[1;34m(url)\u001b[0m\n\u001b[0;32m      7\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_image_links\u001b[39m(url):\n\u001b[0;32m      8\u001b[0m     \u001b[38;5;28mprint\u001b[39m(url)\n\u001b[1;32m----> 9\u001b[0m     \u001b[43mdriver\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     10\u001b[0m     time\u001b[38;5;241m.\u001b[39msleep(\u001b[38;5;241m3\u001b[39m)\n\u001b[0;32m     12\u001b[0m     \u001b[38;5;66;03m# 点击 \"查看全部\" 按钮\u001b[39;00m\n",
      "File \u001b[1;32mc:\\Users\\16673\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py:393\u001b[0m, in \u001b[0;36mWebDriver.get\u001b[1;34m(self, url)\u001b[0m\n\u001b[0;32m    391\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget\u001b[39m(\u001b[38;5;28mself\u001b[39m, url: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m    392\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Loads a web page in the current browser session.\"\"\"\u001b[39;00m\n\u001b[1;32m--> 393\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mCommand\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mGET\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43murl\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mc:\\Users\\16673\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py:384\u001b[0m, in \u001b[0;36mWebDriver.execute\u001b[1;34m(self, driver_command, params)\u001b[0m\n\u001b[0;32m    382\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcommand_executor\u001b[38;5;241m.\u001b[39mexecute(driver_command, params)\n\u001b[0;32m    383\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response:\n\u001b[1;32m--> 384\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43merror_handler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcheck_response\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    385\u001b[0m     response[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvalue\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_unwrap_value(response\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvalue\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[0;32m    386\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
      "File \u001b[1;32mc:\\Users\\16673\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\selenium\\webdriver\\remote\\errorhandler.py:232\u001b[0m, in \u001b[0;36mErrorHandler.check_response\u001b[1;34m(self, response)\u001b[0m\n\u001b[0;32m    230\u001b[0m         alert_text \u001b[38;5;241m=\u001b[39m value[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124malert\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m    231\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m exception_class(message, screen, stacktrace, alert_text)  \u001b[38;5;66;03m# type: ignore[call-arg]  # mypy is not smart enough here\u001b[39;00m\n\u001b[1;32m--> 232\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception_class(message, screen, stacktrace)\n",
      "\u001b[1;31mInvalidSessionIdException\u001b[0m: Message: invalid session id\nStacktrace:\n\tGetHandleVerifier [0x00007FF67F054C25+3179557]\n\t(No symbol) [0x00007FF67ECB88A0]\n\t(No symbol) [0x00007FF67EB48FFC]\n\t(No symbol) [0x00007FF67EB8F8DF]\n\t(No symbol) [0x00007FF67EBC7AF2]\n\t(No symbol) [0x00007FF67EBC244E]\n\t(No symbol) [0x00007FF67EBC14F9]\n\t(No symbol) [0x00007FF67EB155E5]\n\tGetHandleVerifier [0x00007FF67F0B67CD+3579853]\n\tGetHandleVerifier [0x00007FF67F0CD1D2+3672530]\n\tGetHandleVerifier [0x00007FF67F0C2153+3627347]\n\tGetHandleVerifier [0x00007FF67EE2092A+868650]\n\t(No symbol) [0x00007FF67ECC2FFF]\n\t(No symbol) [0x00007FF67EB141FF]\n\tGetHandleVerifier [0x00007FF67F13FA28+4141608]\n\tBaseThreadInitThunk [0x00007FFFD47EE8D7+23]\n\tRtlUserThreadStart [0x00007FFFD5D5BF2C+44]\n"
     ]
    }
   ],
   "source": [
    "def download_image(url, name):\n",
    "    driver.get(url)\n",
    "    with open(name, \"wb\") as f:\n",
    "        f.write(driver.find_element(By.TAG_NAME, \"img\").screenshot_as_png)\n",
    "\n",
    "# 获取作品详情页中的图片链接\n",
    "def get_image_links(url):\n",
    "    print(url)\n",
    "    driver.get(url)\n",
    "    time.sleep(3)\n",
    "    \n",
    "    # 点击 \"查看全部\" 按钮\n",
    "    \"\"\"<div class=\"sc-emr523-2 wEKy\">阅读作品 </div>\n",
    "    //*[@id=\"root\"]/div[2]/div/div[3]/div/div/div[1]/main/section/div[1]/div/div[5]/div/div/button/div[2]\"\"\"\n",
    "    driver.find_element(By.XPATH, '//*[@id=\"root\"]/div[2]/div/div[3]/div/div/div[1]/main/section/div[1]/div/div[5]/div/div/button/div[2]').click()\n",
    "\n",
    "    # # 模拟滚动加载更多内容\n",
    "    # for _ in range(2):  # 根据需要调整滚动次数\n",
    "    #     driver.execute_script(\"window.scrollTo(0, document.body.scrollHeight);\")\n",
    "    #     time.sleep(2)\n",
    "\n",
    "    # 滚动加载全部内容\n",
    "    for _ in range(5):\n",
    "        driver.find_element(By.TAG_NAME, \"body\").send_keys(Keys.PAGE_DOWN)\n",
    "        time.sleep(1)\n",
    "    \n",
    "    # 获取页面 HTML\n",
    "    soup = bs4.BeautifulSoup(driver.page_source, \"html.parser\")\n",
    "    image_links = soup.find_all(\"a\", class_=\"gtm-expand-full-size-illust\")\n",
    "    for i, link in enumerate(image_links):\n",
    "        href = link.get(\"href\")\n",
    "        name = link.get(\"href\").split(\"/\")[-1]\n",
    "        download_image(href, name) # 下载图片\n",
    "        print(f\"已下载第 {i+1}/{len(image_links)} 张图片\")\n",
    "        \n",
    "    return image_links\n",
    "\n",
    "# 获取所有作品详情页图片链接\n",
    "for link in page_links:\n",
    "    get_image_links(link)\n",
    "\n",
    "driver.quit()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
